In [1]:
%matplotlib inline

import os
import pathlib

import arrow
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import dota.helpers as h
from dota import api
import dota.sql.orm as o

data_dir = pathlib.Path(os.path.expanduser('~/sandbox/dota/data/pro')) games = cached_games(data_dir)

Limit to 6.80 and filter modes with picks or bans

_680 = arrow.get(1381989600)

pbs = (api.DetailsResponse.from_json(str(data_dir) + '/' + str(x) + '.json') for x in games) pbs = (dr for dr in pbs if getattr(dr, 'picks_bans') is not None and dr.start_time > _680) df = pd.concat(x.picks_bans for x in pbs) df['hero_id'] = df.hero_id.replace(api._hero_id_to_names) df.head()

picks = df[df.is_pick]['hero_id'].value_counts() bans = df[~df.is_pick]['hero_id'].value_counts() either = picks + bans either.sort()

resort for plot

picks = picks.reindex_like(either) bans = bans.reindex_like(either)

fig, axes = plt.subplots(figsize=(20, 40), ncols=3) either.plot(kind='barh', ax=axes[0], label='Either') picks.plot(kind='barh', ax=axes[1], label='Picks') bans.plot(kind='barh', ax=axes[2], label='Bans')

Model Hero Picks / Bans


In [2]:
def load_frame(data_dir):
    if isinstance(data_dir, str):
        data_dir = pathlib.Path(os.path.expanduser(data_dir))

    games = h.cached_games(data_dir)
    _680 = arrow.get(1381989600)  # TODO: take as arg

    pbs = (api.DetailsResponse.from_json(str(x)) for x in games)
    pbs = (dr for dr in pbs if getattr(dr, 'picks_bans') is not None and dr.start_time > _680
           and dr.game_mode == 2)

    dfs = []

    for dr in pbs:
        df = dr.picks_bans.copy()
        df.loc[df.team == 0, 'team_name'] = getattr(dr, 'radiant_name', np.nan)
        df.loc[df.team == 0, 'team_id'] = getattr(dr, 'radiant_team_id', np.nan)
    
        df.loc[df.team == 1, 'team_name'] = getattr(dr, 'dire_name', np.nan)
        df.loc[df.team == 1, 'team_id'] = getattr(dr, 'dire_team_id', np.nan)
        df['match_id'] = dr.match_id
        dfs.append(df)

    df = pd.concat(dfs, ignore_index=True)
    df = df.replace({None: np.nan})
    df = h.pb_only_complete_drafts(df)
    df['team_id_f'], team_id_labels = pd.factorize(df.team_id)
    df['hero_id_f'], hero_id_labels = pd.factorize(df.hero_id)

    return df

In [3]:
df = load_frame('~/sandbox/dota/data/pro/')

In [4]:
df.head(10)


Out[4]:
hero_id is_pick order team team_name team_id match_id team_id_f hero_id_f
0 89 False 0 0 LGD-GAMING.int 77666 347983846 0 0
1 65 False 1 1 TongFu.WanZhou 995707 347983846 1 1
2 95 False 2 0 LGD-GAMING.int 77666 347983846 0 2
3 76 False 3 1 TongFu.WanZhou 995707 347983846 1 3
4 103 True 4 0 LGD-GAMING.int 77666 347983846 0 4
5 91 True 5 1 TongFu.WanZhou 995707 347983846 1 5
6 63 True 6 1 TongFu.WanZhou 995707 347983846 1 6
7 86 True 7 0 LGD-GAMING.int 77666 347983846 0 7
8 81 False 8 0 LGD-GAMING.int 77666 347983846 0 8
9 53 False 9 1 TongFu.WanZhou 995707 347983846 1 9

10 rows × 9 columns

We can't just plug in hero_id as a feature since the integers imply an ordering. In reality hero_id=0 is no "closer" to hero_id=1 than to hero_id=99.

We'll try out a feature hashing or dict vectorizer.


In [6]:
from sklearn.feature_extraction import DictVectorizer

api._hero_id_to_names


Out[6]:
{1: 'antimage',
 2: 'axe',
 3: 'bane',
 4: 'bloodseeker',
 5: 'crystal_maiden',
 6: 'drow_ranger',
 7: 'earthshaker',
 8: 'juggernaut',
 9: 'mirana',
 10: 'morphling',
 11: 'nevermore',
 12: 'phantom_lancer',
 13: 'puck',
 14: 'pudge',
 15: 'razor',
 16: 'sand_king',
 17: 'storm_spirit',
 18: 'sven',
 19: 'tiny',
 20: 'vengefulspirit',
 21: 'windrunner',
 22: 'zuus',
 23: 'kunkka',
 25: 'lina',
 26: 'lion',
 27: 'shadow_shaman',
 28: 'slardar',
 29: 'tidehunter',
 30: 'witch_doctor',
 31: 'lich',
 32: 'riki',
 33: 'enigma',
 34: 'tinker',
 35: 'sniper',
 36: 'necrolyte',
 37: 'warlock',
 38: 'beastmaster',
 39: 'queenofpain',
 40: 'venomancer',
 41: 'faceless_void',
 42: 'skeleton_king',
 43: 'death_prophet',
 44: 'phantom_assassin',
 45: 'pugna',
 46: 'templar_assassin',
 47: 'viper',
 48: 'luna',
 49: 'dragon_knight',
 50: 'dazzle',
 51: 'rattletrap',
 52: 'leshrac',
 53: 'furion',
 54: 'life_stealer',
 55: 'dark_seer',
 56: 'clinkz',
 57: 'omniknight',
 58: 'enchantress',
 59: 'huskar',
 60: 'night_stalker',
 61: 'broodmother',
 62: 'bounty_hunter',
 63: 'weaver',
 64: 'jakiro',
 65: 'batrider',
 66: 'chen',
 67: 'spectre',
 68: 'ancient_apparition',
 69: 'doom_bringer',
 70: 'ursa',
 71: 'spirit_breaker',
 72: 'gyrocopter',
 73: 'alchemist',
 74: 'invoker',
 75: 'silencer',
 76: 'obsidian_destroyer',
 77: 'lycan',
 78: 'brewmaster',
 79: 'shadow_demon',
 80: 'lone_druid',
 81: 'chaos_knight',
 82: 'meepo',
 83: 'treant',
 84: 'ogre_magi',
 85: 'undying',
 86: 'rubick',
 87: 'disruptor',
 88: 'nyx_assassin',
 89: 'naga_siren',
 90: 'keeper_of_the_light',
 91: 'wisp',
 92: 'visage',
 93: 'slark',
 94: 'medusa',
 95: 'troll_warlord',
 96: 'centaur',
 97: 'magnataur',
 98: 'shredder',
 99: 'bristleback',
 100: 'tusk',
 101: 'skywrath_mage',
 102: 'abaddon',
 103: 'elder_titan',
 104: 'legion_commander',
 106: 'ember_spirit',
 107: 'earth_spirit',
 108: 'abyssal_underlord',
 109: 'terrorblade',
 110: 'phoenix'}

In [396]:
# predict first ban by banner and bannee teams

endog = df.query('order == 0')['hero_id_f']
exog = df.query('order == 0')[['team', 'team_id_f']]
exog['team_id_a'] = df.loc[endog.index + 1, 'team_id_f'].values # labels are off by one

exog = exog[(exog >= 0).all(1)]  
good_idx = exog.index

y = endog.loc[good_idx].values
X = exog.loc[good_idx].values

In [397]:
from sklearn.naive_bayes import GaussianNB, MultinomialNB

# set a prior to be the average for each hero
# sorted by hero_id
prior = pd.value_counts(y).div(len(y)).sort_index().values

clf = MultinomialNB(fit_prior=True, class_prior=prior)
clf.fit(X, y)


Out[397]:
MultinomialNB(alpha=1.0,
       class_prior=array([ 0.05976,  0.03683,  0.00452,  0.02536,  0.05386,  0.02988,
        0.00591,  0.00104,  0.04204,  0.04448,  0.01286,  0.00104,
        0.00069,  0.01911,  0.04517,  0.01112,  0.00035,  0.00695,
        0.04864,  0.00035,  0.07679,  0.02085,  0.00035,  0.01042,
        0.01946,  0....    0.00313,  0.02571,  0.02397,  0.03127,  0.01042,  0.00104,
        0.00069,  0.00035,  0.04934]),
       fit_prior=True)

In [398]:
print("Score {}".format(clf.score(X, y)))


Score 0.045170257123002086

In [399]:
fig, ax = plt.subplots(figsize=(8, 10))

actual = pd.value_counts(y)
expected = pd.value_counts(clf.predict(X))
cts = pd.concat([actual, expected], axis=1, keys=['actual', 'predicted']).fillna(0)
cts.plot(kind='barh', ax=ax)


Out[399]:
<matplotlib.axes._subplots.AxesSubplot at 0x1140d8590>

In [400]:
# Let's try CV

from sklearn.cross_validation import train_test_split, KFold

In [401]:
kf = KFold(len(y), n_folds=10)

In [402]:
scores = []

for train_idx, test_idx in kf:
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    
    # don't recompute prior (I think)
    clf = MultinomialNB(fit_prior=True, class_prior=prior)
    clf.fit(X, y)
    scores.append(clf.score(X_test, y_test))

print(np.mean(scores))


0.0451655052265

In [408]:
from sklearn.lda import LDA

scores = []
prior = prior / prior.sum()

for train_idx, test_idx in kf:
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    
    # don't recompute prior (I think)
    clf = LDA(len(np.unique(y)), priors=prior)
    clf.fit(X, y)
    scores.append(clf.score(X_test, y_test))

print(np.mean(scores))


0.0997568234611

Predict the $n^{th}$ Pick or Ban

The predictors are now

  • team
  • opponent
  • $\{ban_i : i < n\}$
  • $\{pick_i : i < n\}$

and the response is the hero_id. The priors should be recomputed each time. Put zero weight on previously picked / banned.


In [370]:
# TODO the classifier will care (but shouldn't) about the order.
# I don't care if NP was banned first or second, just that it was
# banned. I spose I could map {pb up till now} -> Z.
# how many permutations are there? maybe just do the ones seen.......


def make_frame(data, order=0):
    """
    Construct the endog and exog arrays.
    
    Parameters
    ----------
    
    data : DataFrame
    order : int
        item to pick for
    
    Results
    -------
    
    endog : Series
    exog : DataFrame
    """
    def wrapper(df, order):
        """
        apply the 3 functions to each df in g.
        
        Combine into a (1 x d) Series / DataFrame
        """
        hero_id = df[df.order == 0]['hero_id_f'].values[0]
        match_id = df['match_id'].iloc[0]
        team_id = h.pb_team_id(df, order=order)
        opp_id = h.pb_opponent_id(df, order=order)
        previous_pbs = h.pb_previous_pbs(df, order=order)
        previous_pbs.index = [match_id]
        temp = pd.DataFrame({'hero_id_f': hero_id, 'team_id': team_id,
                             'opp_id': opp_id}, index=[match_id])
        res = pd.concat([temp, previous_pbs], axis=1)
        return res
    
    g = df.groupby('match_id', as_index=False)
    res = g.apply(wrapper, order=order)
    
    # ensurce column order
    cols = res.columns
    sub = ['hero_id_f', 'team_id', 'opp_id']
    res = res[sub + (cols - sub).tolist()]
    return res

In [10]:
df19 = make_frame(df, order=19)

In [349]:
pool = df.hero_id_f.unique()

def available_heros(df, pool):
    """
    Find the heros that have not been seen already.
    

    Parameters
    ----------
    df : DataFrame
        index is match_id. mathes previous pbs on cols starting
        with pb_

    Returns
    -------
    
    unseen : DataFrame
        index is the match_ids from ``df``. Columns are ...
    """
    
    cols = [x for x in df.columns if x.startswith('pb_')]
    sub = df[cols]
    p = [np.setdiff1d(pool, x[1].values) for x in sub.iterrows()]
    res = pd.DataFrame(p, index=sub.index)
    return res

def unavailable_heros(df):
    """
    Find the heros that have been seen already.
    

    Parameters
    ----------
    df : DataFrame
        index is match_id. mathes previous pbs on cols starting
        with pb_

    Returns
    -------
    
    seen : dict
        {match_id : [seen hero_id_f]}
    """    
    cols = [x for x in df.columns if x.startswith('pb_')]
    sub = df[cols]
    return sub.T.to_dict(outtype='list')


def hero_priors(full_df, feature_df):
    # get baseline priors off freqencies
    # everyhitg is in terms of the factorized hero ids.
    # then just set seen guys to zero
    # and spread their mass over the unseen?
    pool = full_df.hero_id_f.unique()  # should be sorted anyway
    pool.sort()

    # want to get a [len(feature_df) x len(pool)] array of priors
    baseline = pd.value_counts(full_df.hero_id_f, normalize=True,
                               sort=False).sort_index()
    baseline = np.tile(baseline, (len(feature_df), 1))
    baseline = pd.DataFrame(baseline, index=feature_df.index,
                            columns=pool)
    seen = unavailable_heros(feature_df)

    updated = baseline.copy()
    # just set to zero now and renormalize later.
    for k, v in baseline.iterrows():
        z_idx = v.index[v.index.isin(seen[k])]
        updated.loc[k, z_idx] = 0
    updated = updated.div(updated.sum(1), axis=0)  # renormalize
    return updated

In [350]:
priors = hero_priors(df, df19)

In [351]:
priors


Out[351]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
347983846 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.014865 ...
348022248 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.030571 0.000000 0.000000 0.042446 0.000000 0.000000 0.031973 0.000000 0.019914 0.000000 0.002988 0.006188 0.000000 0.014942 ...
348136127 0.000000 0.000000 0.003817 0.000000 0.000000 0.000000 0.000000 0.039389 0.002075 0.044396 0.000000 0.000000 0.000000 0.000000 0.020829 0.000000 0.003126 0.006473 0.000000 0.015628 ...
348204084 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.040795 0.002149 0.045981 0.000000 0.009425 0.000000 0.000000 0.000000 0.000000 0.003237 0.006704 0.000000 0.016186 ...
348502094 0.000000 0.000000 0.000000 0.031733 0.000000 0.000000 0.000000 0.000000 0.001944 0.000000 0.043399 0.008530 0.000000 0.032925 0.019522 0.000000 0.002930 0.006067 0.000000 0.000000 ...
348564710 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.002106 0.000000 0.047000 0.009237 0.000000 0.035657 0.021142 0.000000 0.003173 0.006570 0.000000 0.015863 ...
348565633 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.038591 0.002033 0.043496 0.045366 0.008916 0.032764 0.000000 0.020407 0.000000 0.003062 0.006341 0.051138 0.000000 ...
348626099 0.000000 0.000000 0.000000 0.033894 0.000000 0.000000 0.000000 0.039432 0.002077 0.000000 0.046354 0.009110 0.000000 0.000000 0.020851 0.000000 0.003129 0.006480 0.000000 0.000000 ...
348640372 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.030778 0.037914 0.001997 0.000000 0.000000 0.000000 0.032189 0.033813 0.020048 0.000000 0.003009 0.006230 0.050241 0.015043 ...
348699445 0.000000 0.000000 0.000000 0.034306 0.000000 0.000000 0.000000 0.000000 0.002102 0.000000 0.046918 0.009221 0.033885 0.035595 0.021105 0.000000 0.003167 0.006558 0.000000 0.000000 ...
348705839 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.002239 0.000000 0.049976 0.009822 0.036094 0.037915 0.022480 0.000000 0.003374 0.006986 0.056335 0.000000 ...
348777773 0.000000 0.000000 0.000000 0.037226 0.000000 0.000000 0.000000 0.000000 0.002281 0.000000 0.000000 0.010006 0.000000 0.000000 0.022901 0.000000 0.003437 0.007117 0.000000 0.000000 ...
348821532 0.000000 0.000000 0.000000 0.031967 0.000000 0.000000 0.030191 0.000000 0.001959 0.000000 0.000000 0.008593 0.000000 0.033169 0.019666 0.041761 0.002951 0.006111 0.049283 0.014756 ...
348850314 0.000000 0.000000 0.000000 0.034595 0.000000 0.000000 0.032673 0.000000 0.002120 0.000000 0.000000 0.009299 0.034171 0.000000 0.021283 0.000000 0.003194 0.006614 0.000000 0.015969 ...
349136807 0.000000 0.000000 0.000000 0.000000 0.000000 0.017457 0.000000 0.000000 0.002197 0.000000 0.000000 0.000000 0.000000 0.037198 0.000000 0.000000 0.003310 0.006854 0.000000 0.016549 ...
349184797 0.000000 0.000000 0.000000 0.000000 0.000000 0.016310 0.000000 0.000000 0.002052 0.000000 0.000000 0.009003 0.000000 0.034755 0.020606 0.000000 0.000000 0.006404 0.051639 0.015462 ...
349299444 0.033924 0.000000 0.003520 0.031221 0.000000 0.000000 0.029486 0.000000 0.000000 0.040939 0.000000 0.008392 0.000000 0.032394 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ...
349373435 0.000000 0.000000 0.003538 0.000000 0.000000 0.000000 0.029641 0.036513 0.000000 0.041154 0.000000 0.000000 0.000000 0.000000 0.000000 0.041000 0.002897 0.006000 0.048385 0.014487 ...
349705691 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.032107 0.000000 0.002083 0.044577 0.000000 0.009138 0.000000 0.035273 0.020914 0.000000 0.003138 0.006499 0.052409 0.000000 ...
349799957 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.031650 0.000000 0.002053 0.043943 0.000000 0.009008 0.033101 0.034771 0.020616 0.000000 0.003094 0.006407 0.000000 0.000000 ...
349804537 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.029798 0.036707 0.001933 0.041372 0.043151 0.008481 0.000000 0.032737 0.019410 0.000000 0.000000 0.006032 0.000000 0.000000 ...
349889085 0.000000 0.000000 0.000000 0.000000 0.000000 0.016221 0.000000 0.000000 0.002041 0.000000 0.000000 0.008954 0.032905 0.034565 0.020494 0.000000 0.003075 0.006369 0.051358 0.015377 ...
349893280 0.000000 0.000000 0.000000 0.033630 0.000000 0.000000 0.000000 0.000000 0.002061 0.044098 0.045994 0.009039 0.000000 0.000000 0.020689 0.000000 0.003105 0.000000 0.000000 0.015524 ...
349975468 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.002043 0.000000 0.000000 0.008960 0.032928 0.034589 0.020508 0.000000 0.003078 0.006373 0.000000 0.015388 ...
349979765 0.000000 0.000000 0.000000 0.032550 0.000000 0.000000 0.000000 0.000000 0.001994 0.000000 0.044517 0.008749 0.032151 0.033773 0.020024 0.042522 0.003005 0.006223 0.000000 0.000000 ...
350022949 0.000000 0.042257 0.000000 0.000000 0.000000 0.016259 0.000000 0.000000 0.002046 0.000000 0.045667 0.008975 0.032981 0.000000 0.020542 0.000000 0.003083 0.006384 0.051477 0.000000 ...
350026348 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.035697 0.001880 0.040235 0.041964 0.008247 0.000000 0.031837 0.018876 0.040084 0.002833 0.005866 0.000000 0.014164 ...
350067532 0.000000 0.000000 0.000000 0.000000 0.000000 0.017258 0.000000 0.000000 0.002172 0.046475 0.000000 0.000000 0.035008 0.000000 0.021804 0.000000 0.003272 0.006776 0.000000 0.016360 ...
350095199 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.032417 0.000000 0.002103 0.000000 0.000000 0.009226 0.033904 0.035614 0.021116 0.000000 0.003169 0.006562 0.052916 0.000000 ...
350114068 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.038970 0.002052 0.043923 0.000000 0.009004 0.000000 0.034755 0.020607 0.000000 0.003092 0.006404 0.000000 0.000000 ...
350145981 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.032152 0.039606 0.002086 0.000000 0.000000 0.009151 0.000000 0.035323 0.020943 0.000000 0.003143 0.006508 0.000000 0.015715 ...
350517339 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.001988 0.042536 0.000000 0.008719 0.000000 0.033658 0.019956 0.000000 0.002995 0.006201 0.050009 0.014974 ...
350673921 0.000000 0.000000 0.003599 0.000000 0.000000 0.000000 0.030145 0.000000 0.001956 0.041854 0.000000 0.008579 0.031527 0.033118 0.019636 0.000000 0.000000 0.006102 0.000000 0.014733 ...
350745123 0.000000 0.000000 0.003747 0.000000 0.000000 0.000000 0.000000 0.038661 0.002036 0.043575 0.000000 0.000000 0.032824 0.034480 0.000000 0.000000 0.003068 0.006353 0.051231 0.015340 ...
350746270 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.036057 0.001899 0.040640 0.000000 0.000000 0.030613 0.032158 0.019067 0.000000 0.000000 0.005925 0.000000 0.014306 ...
351186580 0.000000 0.000000 0.003648 0.000000 0.000000 0.000000 0.000000 0.037641 0.001983 0.000000 0.044249 0.008697 0.000000 0.033570 0.000000 0.000000 0.002987 0.000000 0.049880 0.000000 ...
351359792 0.000000 0.000000 0.004010 0.035567 0.000000 0.000000 0.000000 0.000000 0.002179 0.046638 0.000000 0.009560 0.000000 0.000000 0.021881 0.000000 0.003284 0.006800 0.000000 0.000000 ...
351397858 0.000000 0.000000 0.003801 0.000000 0.000000 0.000000 0.031843 0.039225 0.002066 0.000000 0.000000 0.009063 0.000000 0.000000 0.020742 0.000000 0.003113 0.006446 0.051979 0.015563 ...
351978945 0.000000 0.000000 0.000000 0.000000 0.000000 0.016530 0.000000 0.000000 0.002080 0.044514 0.000000 0.000000 0.000000 0.035223 0.020884 0.000000 0.003134 0.006490 0.000000 0.000000 ...
352063575 0.000000 0.000000 0.000000 0.031636 0.000000 0.015404 0.000000 0.036805 0.001938 0.000000 0.000000 0.000000 0.000000 0.032825 0.019462 0.000000 0.000000 0.006048 0.000000 0.014603 ...
352197688 0.000000 0.000000 0.000000 0.031216 0.000000 0.000000 0.000000 0.036316 0.000000 0.000000 0.000000 0.000000 0.000000 0.032389 0.019204 0.000000 0.002882 0.005968 0.000000 0.014409 ...
352270980 0.000000 0.000000 0.003646 0.000000 0.000000 0.015747 0.000000 0.037624 0.001982 0.000000 0.000000 0.000000 0.000000 0.033555 0.019895 0.000000 0.000000 0.000000 0.049857 0.014928 ...
352285297 0.000000 0.000000 0.000000 0.000000 0.000000 0.016508 0.000000 0.000000 0.002077 0.044456 0.000000 0.000000 0.000000 0.035177 0.020857 0.000000 0.003130 0.006481 0.000000 0.015650 ...
352393093 0.000000 0.000000 0.003553 0.000000 0.035689 0.000000 0.000000 0.000000 0.001931 0.000000 0.000000 0.000000 0.031132 0.032702 0.000000 0.000000 0.002910 0.006025 0.048590 0.014549 ...
352594789 0.000000 0.000000 0.000000 0.032903 0.000000 0.000000 0.031075 0.000000 0.002016 0.000000 0.045000 0.000000 0.000000 0.034140 0.020242 0.000000 0.003038 0.006290 0.050726 0.000000 ...
352599230 0.000000 0.000000 0.000000 0.036123 0.000000 0.000000 0.000000 0.000000 0.002213 0.000000 0.049404 0.009710 0.000000 0.000000 0.022223 0.000000 0.003335 0.006906 0.000000 0.016675 ...
352661817 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.037646 0.001983 0.000000 0.044255 0.008698 0.000000 0.033575 0.019907 0.000000 0.002987 0.006186 0.000000 0.000000 ...
352676457 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.033589 0.041376 0.002179 0.046635 0.000000 0.009560 0.000000 0.036901 0.021879 0.000000 0.003283 0.006799 0.000000 0.000000 ...
352686986 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.002140 0.000000 0.047767 0.009388 0.034499 0.000000 0.021487 0.000000 0.003224 0.006677 0.000000 0.016122 ...
352731534 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.028863 0.035555 0.001873 0.000000 0.041797 0.000000 0.000000 0.031710 0.018801 0.000000 0.002821 0.005843 0.047115 0.000000 ...
352733158 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.002054 0.043955 0.000000 0.009010 0.033110 0.034780 0.020622 0.000000 0.003095 0.006408 0.000000 0.015473 ...
352807549 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.029831 0.036747 0.001935 0.000000 0.000000 0.008490 0.000000 0.032773 0.019432 0.000000 0.002916 0.006039 0.048696 0.000000 ...
352817378 0.000000 0.000000 0.000000 0.033722 0.000000 0.000000 0.000000 0.000000 0.002066 0.044219 0.046120 0.009064 0.000000 0.000000 0.000000 0.000000 0.003113 0.000000 0.000000 0.015566 ...
352863524 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.002096 0.000000 0.046778 0.009194 0.000000 0.035489 0.021042 0.000000 0.003158 0.006539 0.052730 0.000000 ...
352891432 0.000000 0.000000 0.000000 0.032797 0.000000 0.000000 0.030975 0.000000 0.002010 0.000000 0.044855 0.008816 0.000000 0.034030 0.020177 0.000000 0.003028 0.006270 0.000000 0.000000 ...
352893672 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.030214 0.000000 0.001960 0.000000 0.043753 0.008599 0.000000 0.000000 0.019681 0.000000 0.002953 0.006116 0.049320 0.014767 ...
352916974 0.000000 0.000000 0.000000 0.035303 0.000000 0.000000 0.033342 0.000000 0.002163 0.000000 0.048282 0.009489 0.000000 0.000000 0.021718 0.000000 0.003259 0.006749 0.000000 0.000000 ...
353004114 0.000000 0.000000 0.000000 0.033465 0.000000 0.000000 0.000000 0.000000 0.002051 0.043881 0.000000 0.008995 0.000000 0.034722 0.000000 0.000000 0.003089 0.006398 0.000000 0.015447 ...
353025100 0.000000 0.000000 0.000000 0.000000 0.000000 0.016131 0.000000 0.038542 0.002030 0.043441 0.000000 0.008905 0.000000 0.034374 0.000000 0.000000 0.003058 0.006333 0.000000 0.015292 ...
353045827 0.000000 0.000000 0.000000 0.000000 0.000000 0.017354 0.000000 0.000000 0.002184 0.046734 0.000000 0.009580 0.000000 0.000000 0.000000 0.046560 0.003290 0.006814 0.000000 0.016452 ...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...

2878 rows × 104 columns


In [459]:
df5 = make_frame(df, order=5)
priors = hero_priors(df, df5)

In [490]:
y = df5.hero_id_f.values.ravel()
X = df5[['team_id', 'opp_id']].values#, 'pb_0', 'pb_1', 'pb_2', 'pb_3', 'pb_4']].values

In [455]:
### hrmf
priors = priors.sum(0).div(priors.sum(0).sum())
priors = priors[priors.index.isin(np.unique(y))]
priors = priors / priors.sum()

In [469]:
# meh
#
clf = LDA(priors=priors.values)
clf.fit(X, y)
clf.score(X, y)


Out[469]:
0.023627519110493399

In [ ]:
scores = []

for train_idx, test_idx in kf:
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    
    # don't recompute prior (I think)
    clf = MultinomialNB(fit_prior=True, class_prior=prior)
    clf.fit(X, y)
    scores.append(clf.score(X_test, y_test))

print(np.mean(scores))

Trees


In [494]:
from sklearn import tree
scores = []

for train_idx, test_idx in KFold(len(y), n_folds=5):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    clf = tree.DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    scores.append(clf.score(X_test, y_test))

scores


Out[494]:
[0.09375,
 0.11805555555555555,
 0.10069444444444445,
 0.090434782608695655,
 0.095652173913043481]

In [ ]:


In [505]:
clf = tree.DecisionTreeClassifier(max_depth=3)
clf.fit(X_train, y_train)
scores.append(clf.score(X_test, y_test))

with open("tree.dot", 'w') as f:
    f = tree.export_graphviz(clf, out_file=f)

In [506]:
!dot -Tjpg tree.dot -o tree.jpg

In [509]:
from IPython.display import display, display_jpeg, Image
Image('tree.jpg')


Out[509]:

In [500]:
from sklearn.grid_search import GridSearchCV

In [502]:
tree.


Out[502]:
<sklearn.tree._tree.Tree at 0x113ceab28>

In [ ]:
param_grid = {'max_depth': [1, 3, 5, 7, 100]}

GridSearchCV